
/*	
The CDC classified COVID-19 transmission rates as follows:

	1.	Low Transmission:
			Fewer than 10 new cases per 100,000 people in the past 7 days.
			Less than 5% of COVID-19 tests coming back positive.
	2.	Moderate Transmission:
			Between 10 and 49.99 new cases per 100,000 people in the past 7 days.
			Between 5% and 7.99% of COVID-19 tests coming back positive.
	3.	Substantial Transmission:
			Between 50 and 99.99 new cases per 100,000 people in the past 7 days.
			Between 8% and 9.99% of COVID-19 tests coming back positive.
	4.	High Transmission:
			100 or more new cases per 100,000 people in the past 7 days.
			10% or more of COVID-19 tests coming back positive.

*/
cap log close
snapshot erase _all
clear all

global filenum 01
global filename "gen_CDC"

log using "${log_path}/${filenum}_${filename}.smcl", replace

*===============================*
*		Import CDC Data			*
*===============================*

	import delimited "${data_path}/United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv", clear

*===============================*
*		Clean CDC Data			*
*===============================*

	// Setup Dates
		gen date=date(submission_date, "MDY")
		
		gen year=year(date)
		
		keep if year==2020			// keep relevant year

		gen month=month(date)
 		
		keep if inrange(month,3,6)	// keep relevant case count months
		
		gen dow=dow(date)
		
		gen week_start = dow==0 	//sunday starts the week
		
 		sort state date
		
		** Create Unique Week Identifier
 		
		by state (date): gen week_id = sum(week_start)
		
	// Clean-Up State Data
		drop if state=="RMI" | state=="FSM" | state=="GU" | state=="PR"

		replace state = "NY" if state=="NYC"

*===============================*
*		Collapse CDC Data		*
*===============================*
		
	// Collapse case counts to state x week 
		sort state date
		collapse (sum) new_case pnew_case new_death pnew_death (first) date month, by(state week_id)
		
		format date %td
		
*===============================*
*		Merge Additional Data	*
*===============================*
		
		** Merge State Identifiers
			gen state_abbrev=state
			merge m:1 state_abbrev using "${data_path}/statastates.dta", keep(3) nogen

			rename state_fip statefip
	
		** Merge Population
			merge m:1 state_abbrev using "${data_path}/population.dta", keep(3) nogen

*===============================*
*		Panel Set Data			*
*===============================*
		
		xtset statefip date

*===============================*
*		Setup Analysis Data		*
*===============================*
		
	*identify relevant reference week: https://www.bls.gov/cps/definitions.htm#refweek
		
		gen ref_week =0
			replace ref_week = 1 if (date==td(08mar2020))
			replace ref_week = 1 if (date==td(12apr2020))
			replace ref_week = 1 if (date==td(10may2020))
			replace ref_week = 1 if (date==td(07jun2020))
			
	** Create CDC Classifications of Each Week (cases per 100k people)		
		gen new_case_per_100k=(new_case/population)*100000
		
		gen cdc_levels = .
			replace cdc_levels = 1 if new_case_per_100k <10
			replace cdc_levels = 2 if inrange(new_case_per_100k, 10, 50)
			replace cdc_levels = 3 if inrange(new_case_per_100k, 50, 100)
			replace cdc_levels = 4 if new_case_per_100k>=100
			
		label define cdc 1 "low" 2 "moderate" 3 "substantial" 4 "high"
		label values cdc_levels cdc
		
	** Weeks of Interest
		
		sort state date
		by state (date): gen ref_2wk = (ref_week[_n]==1 | (ref_week[_n+1]==1 & ref_week[_n]==0))
		
*===================================*
*	Collapse Data to State x Month	*
*===================================*
	
	** reference week cases
		gen rw_cases = new_case if ref_week==1
		gen rw_cases_per100k = new_case_per_100k if ref_week==1
	
	** Sum up # of Cases in reference week and week prior
		sort state month date
		by state month: egen temp = total(new_case) if ref_2wk==1
		by state month: egen rw_2wk_cases = max(temp)
		drop temp
	
	** Get max CDC classification in reference week and week prior
		sort state month date
		by state month: egen temp = max(cdc_levels) if ref_2wk==1
		by state month: egen cdc_levels_2wks = max(temp)
		drop temp
		
		label values cdc_levels_2wks cdc
				
	** Cases by Month
		by state month: egen month_cases = total(new_case)
	
	keep if ref_week==1
	gen year=2020
	
	keep year month state statefip cdc_* rw_cases rw_cases_per100k rw_2wk_cases month_cases 

*===================================*
*	save CDC Data by state by Month	*
*===================================*

	label var cdc_levels 		"Trnsm Lvl, Ref Week"
	label var cdc_levels_2wks 	"Trnsm Lvl, Ref Week +1"
	label var rw_cases 			"Cases, Ref Week"
	label var rw_cases_per100k	"Cases per 100k, Ref Week"
	label var rw_2wk_cases 		"Cases, Ref Week +1"
	label var month_cases		"Cases, Month"
	
	save "${data_path}/cdc_by_state_by_month.dta", replace

*===================================*
*	Reshape Wide by State			*
*===================================*
	drop month_cases 
	drop rw_cases* 
	drop year 
	
	local reshape_vars "cdc_levels cdc_levels_2wks rw_2wk_cases"

	reshape wide `reshape_vars', i(state statefip) j(month)
		
	foreach v in `reshape_vars'{
		forvalues m=3/6{
			rename `v'`m' `v'_m`m'
			
			if `m'==3 	label var `v'_m`m' "Mar"
			if `m'==4 	label var `v'_m`m' "Apr"
			if `m'==5 	label var `v'_m`m' "May"
			if `m'==6 	label var `v'_m`m' "Jun"
		}
	}
	
	** Make Thicker Bins for Case Counts
		forvalues m=4/6{
			gen covid_low_m`m'		= (cdc_levels_m`m'<=2)
			gen covid_low_2wks_m`m' = (cdc_levels_2wks_m`m'<=2)
		}	
		
		label var covid_low_m4			"RW <50 per 100k, Apr"
		label var covid_low_2wks_m4		"RW+1 <50 per 100k, Apr"
			
		label var covid_low_m5			"RW <50 per 100k, May"
		label var covid_low_2wks_m5		"RW+1 <50 per 100k, May"
		
		label var covid_low_m6			"RW <50 per 100k, Jun"
		label var covid_low_2wks_m6		"RW+1 <50 per 100k, Jun"
		
	** Make Thicker Bins for Case Counts
		forvalues m=4/6{
			gen some_covid_m`m'		=  (cdc_levels_m`m'>1)
			gen some_covid_2wks_m`m' = (cdc_levels_2wks_m`m'>1)
		}	
		
		label var some_covid_m4 			"RW <10 per 100k, Apr"
		label var some_covid_2wks_m4 		"RW+1 <10 per 100k, Apr"

		label var some_covid_m5 			"RW <10 per 100k, May"
		label var some_covid_2wks_m5 		"RW+1 <10 per 100k, May"

		label var some_covid_m6 			"RW <10 per 100k, Jun"
		label var some_covid_2wks_m6 		"RW+1 <10 per 100k, Jun"
		
		
	** generate gradation in the covid case counts **
	
 		gen covid_detail = .
 			replace covid_detail = 1 if covid_low_m4 == 1 & covid_low_m6 ==1
 			replace covid_detail = 2 if covid_low_m4 == 1 & covid_low_m6 ==0
 			replace covid_detail = 3 if covid_low_m4 == 0 & covid_low_m6 ==1
 			replace covid_detail = 4 if covid_low_m4 == 0 & covid_low_m6 ==0
			
 		label define buckets 1 "Apr Low June Low" 2 "Apr Low June High" 3 "Apr High June Low" 4 "Apr High June High"
 		label values covid_detail buckets	
	
	save "${data_path}/cdc_by_state.dta", replace

	
log close		

